#encoding=utf-8

from  __future__ import print_function

import pandas as pd

filename = '../dataSets/chapter5/data/bankloan.xls'

data = pd.read_excel(filename)
# data = data.drop(u'违约', axis=1)

x = data.iloc[:,:8].as_matrix()
y = data.iloc[:,8].as_matrix()

# print(data)
# print(data.iloc[:,:8])
# print(data.iloc[:,8])
# print(x)
# print(y)

from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

rlr = RLR()
rlr.fit(x, y)
rlr.get_support()

print('fit end')
#
data = data.drop(u'违约', axis=1)
print(u'有效特征为: %s' % ','.join(data.columns[rlr.get_support()]))
x = data[data.columns[rlr.get_support()]].as_matrix()

lr = LR()
lr.fit(x, y)
print('fit end')
print(u'模型平均正确率为: %s' % lr.score(x, y))
